Recognition result specification (JSON)
RecognitionResult
Root element of the recognition result
property | type | required | description |
---|---|---|---|
resultSchemaVersion | string | Recognition result schema version in major.minor.patch format, where major , minor , patch are non-negative integers. | |
dataFieldResults | map(string,DataFieldResult) | Results map where the key is the name of a data field and the value is its result |
RecognitionResult json example
Click to expand json
{
"resultSchemaVersion": "1.0.0",
"dataFieldResults": {
"M1": <DataFieldResult>,
...,
"MN": <DataFieldResult>
}
}
DataFieldResult
property | type | required | description |
---|---|---|---|
dataType | string | The type of the data field as it is specified in template. | |
results | list(ResultValue) | The list of recognition results which can be one of several types: TEXT, IMAGE, TABLE, GROUP. |
DataFieldResult json example
Click to expand json
{
"dataType": "root",
"results": [
<ResultValue>,
...,
<ResultValue>
]
}
ResultValue
TextResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always TEXT . | |
content | string | Extracted text data. | |
pageLocationMeta | PageLocationMeta | Describes the location of the result within the PDF file. | |
fontMeta | FontMeta | Contains information about font of the result content. |
TextResult json without meta example
Click to expand json
{
"resultType": "TEXT",
"content": "st nd"
}
TextResult json with meta example
Click to expand json
{
"resultType": "TEXT",
"pageLocationMeta": <PageLocationMeta>,
"fontMeta": <FontMeta>,
"content": "st nd"
}
ImageResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always IMAGE . | |
base64 | string | Representation of the extracted image bytes as base64 string. | |
pageLocationMeta | PageLocationMeta | Describes the location of the result within the PDF file. |
ImageResult json without meta example
Click to expand json
{
"resultType": "IMAGE",
"base64": "abcdefghijk"
}
ImageResult json with meta example
Click to expand json
{
"resultType": "IMAGE",
"pageLocationMeta": <PageLocationMeta>,
"base64": "abcdefghijk"
}
TableResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always TABLE . | |
rows | list(TableRowResult) | The list of table row results. | |
pageLocationMetas | list(PageLocationMeta) | Describes the locations of the result within the PDF file, will contain multiple values in case the table takes up several pages. |
TableResult without meta json example
Click to expand json
{
"resultType": "TABLE",
"rows": [
<TableRowResult>,
...,
<TableRowResult>
]
}
TableResult with meta json example
Click to expand json
{
"resultType": "TABLE",
"pageLocationMetas": [
<PageLocationMeta>,
...,
<PageLocationMeta>
],
"rows": [
<TableRowResult>,
...,
<TableRowResult>
]
}
TableRowResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always TABLE_ROW . | |
cells | list(TableCellResult) | The list of table cells in the row. | |
pageLocationMeta | PageLocationMeta | Describes the location of the result within the PDF file. |
TableRowResult json without meta example
Click to expand json
{
"resultType": "TABLE_ROW",
"cells": [
<TableCellResult>,
...,
<TableCellResult>
]
}
TableRowResult json with meta example
Click to expand json
{
"resultType": "TABLE_ROW",
"pageLocationMeta": <PageLocationMeta>,
"cells": [
<TableCellResult>,
...,
<TableCellResult>
]
}
TableCellResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always TABLE_CELL . | |
content | string | Text data extracted from the cell. | |
pageLocationMeta | PageLocationMeta | Describes the location of the result within the PDF file. | |
fontMeta | FontMeta | Contains information about font of the result content. | |
rowspan | int | Specifies the number of rows a cell should span. | |
colspan | int | Specifies the number of columns a cell should span. |
TableCellResult without meta json example
Click to expand json
{
"resultType": "TABLE_CELL",
"content": "Key"
}
TableCellResult with meta json example
Click to expand json
{
"resultType": "TABLE_CELL",
"pageLocationMeta": <PageLocationMeta>,
"fontMeta": <FontMeta>,
"content": "Key"
}
GroupResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always GROUP . | |
entries | map(string,GroupEntryResult) | Map of grouped result entries, where the key is the name of the nested data field and the value is its result |
GroupResult json example
Click to expand json
{
"resultType": "GROUP",
"entries": {
"GroupEntry1": <GroupEntryResult>,
...,
"GroupEntryN": <GroupEntryResult>
}
}
GroupEntryResult
property | type | required | description |
---|---|---|---|
resultType | string | The type of value result. Always GROUP_ENTRY . | |
results | list(ResultValue) | The list of recognition results which can be one of several types: TEXT, IMAGE, TABLE, GROUP. |
GroupEntryResult json example
Click to expand json
{
"resultType": "GROUP_ENTRY",
"dataType": "dataType",
"results": [
<ResultValue>,
...,
<ResultValue>
]
}
Meta
PageLocationMeta
property | type | required | description |
---|---|---|---|
x | double | The x coordinate on the page. | |
y | double | The y coordinate on the page. | |
width | double | The width of the location. | |
height | double | The height of the location. | |
page | int | The page number. |
PageLocationMeta json example
Click to expand json
{
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
}
FontMeta
property | type | required | description |
---|---|---|---|
fontName | string | The font name. | |
fontStyle | string | The font style. Possible values: NORMAL , BOLD , ITALIC , BOLD_ITALIC . | |
fontColor | string | The font color. The format is rrggbb , where rr , gg , bb are hex representations of corresponding color value. |
FontMeta json example
Click to expand json
{
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
}
Complete example
RecognitionResult without meta json example
Click to expand json
{
"resultSchemaVersion": "1.0.0",
"dataFieldResults": {
"TextField": {
"dataType": "root",
"results": [
{
"resultType": "TEXT",
"content": "st nd"
}
]
},
"ImageField": {
"dataType": "root",
"results": [
{
"resultType": "IMAGE",
"base64": "abcdefghijk"
}
]
},
"TableField": {
"dataType": "root",
"results": [
{
"resultType": "TABLE",
"rows": [
{
"resultType": "TABLE_ROW",
"cells": [
{
"resultType": "TABLE_CELL",
"content": "Key"
},
{
"resultType": "TABLE_CELL",
"content": "Key",
"rowspan": 2,
"colspan": 2
}
]
}
]
}
]
},
"GroupParentField": {
"dataType": "root",
"results": [
{
"resultType": "GROUP",
"entries": {
"GroupNestedField": {
"resultType": "GROUP_ENTRY",
"dataType": "dataType",
"results": [
{
"resultType": "TEXT",
"content": "Group Text"
}
]
}
}
}
]
}
}
}
RecognitionResult with meta json example
Click to expand json
{
"resultSchemaVersion": "1.0.0",
"dataFieldResults": {
"TextField": {
"dataType": "root",
"results": [
{
"resultType": "TEXT",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "st nd"
}
]
},
"ImageField": {
"dataType": "root",
"results": [
{
"resultType": "IMAGE",
"pageLocationMeta": {
"x": 160.8,
"y": 400.31,
"width": 20.1,
"height": 7.42,
"page": 2
},
"base64": "abcdefghijk"
},
],
},
"TableField": {
"dataType": "root",
"results": [
{
"resultType": "TABLE",
"pageLocationMetas": [
{
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
}
],
"rows": [
{
"resultType": "TABLE_ROW",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"cells": [
{
"resultType": "TABLE_CELL",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "Key"
},
{
"resultType": "TABLE_CELL",
"pageLocationMeta": {
"x": 176.8,
"y": 350.9,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "Key",
"colspan": 2,
"rowspan": 2
}
]
}
]
}
]
},
"GroupParentField": {
"dataType": "root",
"results": [
{
"resultType": "GROUP",
"entries": {
"GroupNestedField": {
"resultType": "GROUP_ENTRY",
"dataType": "dataType",
"results": [
{
"resultType": "TEXT",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "Group Text"
}
]
}
}
}
]
}
}
}
JSON schema
Click to expand json schema
{
"$schema": "https://json-schema.org/draft-07/schema#",
"$defs": {
"abstractResult": {
"type": "object",
"properties": {
"resultType": {
"enum": [
"TEXT",
"IMAGE",
"TABLE",
"TABLE_ROW",
"TABLE_CELL",
"GROUP",
"GROUP_ENTRY"
]
}
},
"required": [
"resultType"
],
"allOf": [
{
"if": {
"properties": {
"resultType": {
"const": "TEXT"
}
}
},
"then": {
"$ref": "#/$defs/textResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "IMAGE"
}
}
},
"then": {
"$ref": "#/$defs/imageResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "TABLE"
}
}
},
"then": {
"$ref": "#/$defs/tableResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "TABLE_ROW"
}
}
},
"then": {
"$ref": "#/$defs/tableRowResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "TABLE_CELL"
}
}
},
"then": {
"$ref": "#/$defs/tableCellResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "GROUP"
}
}
},
"then": {
"$ref": "#/$defs/groupResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "GROUP_ENTRY"
}
}
},
"then": {
"$ref": "#/$defs/groupEntryResult"
}
}
]
},
"textResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TEXT"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"fontMeta": {
"$ref": "#/$defs/fontMeta"
},
"content": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"resultType",
"content"
]
},
"imageResult": {
"type": "object",
"properties": {
"resultType": {
"const": "IMAGE"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"base64": {
"type": "string",
"pattern": "[a-zA-Z+/=]+"
}
},
"additionalProperties": false,
"required": [
"resultType",
"base64"
]
},
"tableResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TABLE"
},
"pageLocationMetas": {
"type": "array",
"items": {
"$ref": "#/$defs/pageLocationMeta"
}
},
"rows": {
"type": "array",
"items": {
"$ref": "#/$defs/tableRowResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"tableRowResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TABLE_ROW"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"cells": {
"type": "array",
"items": {
"$ref": "#/$defs/tableCellResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"tableCellResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TABLE_CELL"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"fontMeta": {
"$ref": "#/$defs/fontMeta"
},
"colspan": {
"type": "integer",
"exclusiveMinimum": 1
},
"rowspan": {
"type": "integer",
"exclusiveMinimum": 1
},
"content": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"resultType",
"content"
]
},
"groupResult": {
"type": "object",
"properties": {
"resultType": {
"const": "GROUP"
},
"entries": {
"type": "object",
"$comment": "Using additional properties as any property name is group entry name.",
"additionalProperties": {
"$ref": "#/$defs/groupEntryResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"groupEntryResult": {
"type": "object",
"properties": {
"resultType": {
"const": "GROUP_ENTRY"
},
"dataType": {
"type": "string"
},
"results": {
"type": "array",
"items": {
"$ref": "#/$defs/abstractResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"pageLocationMeta": {
"type": "object",
"properties": {
"x": {
"type": "number"
},
"y": {
"type": "number"
},
"width": {
"type": "number"
},
"height": {
"type": "number"
},
"page": {
"type": "integer"
}
},
"additionalProperties": false
},
"fontMeta": {
"type": "object",
"properties": {
"fontName": {
"type": "string"
},
"fontStyle": {
"enum": ["BOLD", "ITALIC", "BOLD_ITALIC", "NORMAL"]
},
"fontColor": {
"type": "string",
"pattern": "^#?[0-9a-fA-F]{6}$"
}
},
"additionalProperties": false
}
},
"type": "object",
"properties": {
"resultSchemaVersion": {
"type": "string",
"pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"
},
"dataFieldResults": {
"type": "object",
"$comment": "Using additional properties as any property name is datafield name.",
"additionalProperties": {
"type": "object",
"properties": {
"dataType": {
"type": "string"
},
"results": {
"type": "array",
"items": {
"$ref": "#/$defs/abstractResult"
}
}
},
"additionalProperties": false
}
}
},
"additionalProperties": false,
"required": [
"resultSchemaVersion"
]
}